<!doctype html>
<title>document.characterSet (inputEncoding and charset as aliases) normalization tests</title>
<link rel=author title="Aryeh Gregor" href=ayg@aryeh.name>
<meta name=timeout content=long>
<div id=log></div>
<script src=/resources/testharness.js></script>
<script src=/resources/testharnessreport.js></script>
<style>iframe { display: none }</style>
<script>
"use strict";

// Taken straight from https://encoding.spec.whatwg.org/
var encodingMap = {
  "UTF-8": [
    "unicode-1-1-utf-8",
    "utf-8",
    "utf8",
    // As we use <meta>, utf-16 will map to utf-8 per
    // https://html.spec.whatwg.org/multipage/#documentEncoding
    "utf-16",
    "utf-16le",
    "utf-16be",
  ],
  "IBM866": [
    "866",
    "cp866",
    "csibm866",
    "ibm866",
  ],
  "ISO-8859-2": [
    "csisolatin2",
    "iso-8859-2",
    "iso-ir-101",
    "iso8859-2",
    "iso88592",
    "iso_8859-2",
    "iso_8859-2:1987",
    "l2",
    "latin2",
  ],
  "ISO-8859-3": [
    "csisolatin3",
    "iso-8859-3",
    "iso-ir-109",
    "iso8859-3",
    "iso88593",
    "iso_8859-3",
    "iso_8859-3:1988",
    "l3",
    "latin3",
  ],
  "ISO-8859-4": [
    "csisolatin4",
    "iso-8859-4",
    "iso-ir-110",
    "iso8859-4",
    "iso88594",
    "iso_8859-4",
    "iso_8859-4:1988",
    "l4",
    "latin4",
  ],
  "ISO-8859-5": [
    "csisolatincyrillic",
    "cyrillic",
    "iso-8859-5",
    "iso-ir-144",
    "iso8859-5",
    "iso88595",
    "iso_8859-5",
    "iso_8859-5:1988",
  ],
  "ISO-8859-6": [
    "arabic",
    "asmo-708",
    "csiso88596e",
    "csiso88596i",
    "csisolatinarabic",
    "ecma-114",
    "iso-8859-6",
    "iso-8859-6-e",
    "iso-8859-6-i",
    "iso-ir-127",
    "iso8859-6",
    "iso88596",
    "iso_8859-6",
    "iso_8859-6:1987",
  ],
  "ISO-8859-7": [
    "csisolatingreek",
    "ecma-118",
    "elot_928",
    "greek",
    "greek8",
    "iso-8859-7",
    "iso-ir-126",
    "iso8859-7",
    "iso88597",
    "iso_8859-7",
    "iso_8859-7:1987",
    "sun_eu_greek",
  ],
  "ISO-8859-8": [
    "csiso88598e",
    "csisolatinhebrew",
    "hebrew",
    "iso-8859-8",
    "iso-8859-8-e",
    "iso-ir-138",
    "iso8859-8",
    "iso88598",
    "iso_8859-8",
    "iso_8859-8:1988",
    "visual",
  ],
  "ISO-8859-8-I": [
    "csiso88598i",
    "iso-8859-8-i",
    "logical",
  ],
  "ISO-8859-10": [
    "csisolatin6",
    "iso-8859-10",
    "iso-ir-157",
    "iso8859-10",
    "iso885910",
    "l6",
    "latin6",
  ],
  "ISO-8859-13": [
    "iso-8859-13",
    "iso8859-13",
    "iso885913",
  ],
  "ISO-8859-14": [
    "iso-8859-14",
    "iso8859-14",
    "iso885914",
  ],
  "ISO-8859-15": [
    "csisolatin9",
    "iso-8859-15",
    "iso8859-15",
    "iso885915",
    "iso_8859-15",
    "l9",
  ],
  "ISO-8859-16": [
    "iso-8859-16",
  ],
  "KOI8-R": [
    "cskoi8r",
    "koi",
    "koi8",
    "koi8-r",
    "koi8_r",
  ],
  "KOI8-U": [
    "koi8-ru",
    "koi8-u",
  ],
  "macintosh": [
    "csmacintosh",
    "mac",
    "macintosh",
    "x-mac-roman",
  ],
  "windows-874": [
    "dos-874",
    "iso-8859-11",
    "iso8859-11",
    "iso885911",
    "tis-620",
    "windows-874",
  ],
  "windows-1250": [
    "cp1250",
    "windows-1250",
    "x-cp1250",
  ],
  "windows-1251": [
    "cp1251",
    "windows-1251",
    "x-cp1251",
  ],
  "windows-1252": [
    "ansi_x3.4-1968",
    "ascii",
    "cp1252",
    "cp819",
    "csisolatin1",
    "ibm819",
    "iso-8859-1",
    "iso-ir-100",
    "iso8859-1",
    "iso88591",
    "iso_8859-1",
    "iso_8859-1:1987",
    "l1",
    "latin1",
    "us-ascii",
    "windows-1252",
    "x-cp1252",
    // As we use <meta>, x-user-defined will map to windows-1252 per
    // https://html.spec.whatwg.org/multipage/#documentEncoding
    "x-user-defined"
  ],
  "windows-1253": [
    "cp1253",
    "windows-1253",
    "x-cp1253",
  ],
  "windows-1254": [
    "cp1254",
    "csisolatin5",
    "iso-8859-9",
    "iso-ir-148",
    "iso8859-9",
    "iso88599",
    "iso_8859-9",
    "iso_8859-9:1989",
    "l5",
    "latin5",
    "windows-1254",
    "x-cp1254",
  ],
  "windows-1255": [
    "cp1255",
    "windows-1255",
    "x-cp1255",
  ],
  "windows-1256": [
    "cp1256",
    "windows-1256",
    "x-cp1256",
  ],
  "windows-1257": [
    "cp1257",
    "windows-1257",
    "x-cp1257",
  ],
  "windows-1258": [
    "cp1258",
    "windows-1258",
    "x-cp1258",
  ],
  "x-mac-cyrillic": [
    "x-mac-cyrillic",
    "x-mac-ukrainian",
  ],
  "GBK": [
    "chinese",
    "csgb2312",
    "csiso58gb231280",
    "gb2312",
    "gb_2312",
    "gb_2312-80",
    "gbk",
    "iso-ir-58",
    "x-gbk",
  ],
  "gb18030": [
    "gb18030",
  ],
  "Big5": [
    "big5",
    "big5-hkscs",
    "cn-big5",
    "csbig5",
    "x-x-big5",
  ],
  "EUC-JP": [
    "cseucpkdfmtjapanese",
    "euc-jp",
    "x-euc-jp",
  ],
  "ISO-2022-JP": [
    "csiso2022jp",
    "iso-2022-jp",
  ],
  "Shift_JIS": [
    "csshiftjis",
    "ms932",
    "ms_kanji",
    "shift-jis",
    "shift_jis",
    "sjis",
    "windows-31j",
    "x-sjis",
  ],
  "EUC-KR": [
    "cseuckr",
    "csksc56011987",
    "euc-kr",
    "iso-ir-149",
    "korean",
    "ks_c_5601-1987",
    "ks_c_5601-1989",
    "ksc5601",
    "ksc_5601",
    "windows-949",
  ],
  "replacement": [
    "csiso2022kr",
    "hz-gb-2312",
    "iso-2022-cn",
    "iso-2022-cn-ext",
    "iso-2022-kr",
  ],
};

// Add spaces and mix up case
Object.keys(encodingMap).forEach(function(name) {
  var lower = encodingMap[name];
  var upper = encodingMap[name].map(function(s) { return s.toUpperCase() });
  var mixed = encodingMap[name].map(function(s) {
    var ret = "";
    for (var i = 0; i < s.length; i += 2) {
      ret += s[i].toUpperCase();
      if (i + 1 < s.length) {
        ret += s[i + 1];
      }
    }
    return ret;
  });
  var spacey = encodingMap[name].map(function(s) {
    return " \t\n\f\r" + s + " \t\n\f\r";
  });
  encodingMap[name] = [];
  for (var i = 0; i < lower.length; i++) {
    encodingMap[name].push(lower[i]);
    /*
    if (lower[i] != upper[i]) {
      encodingMap[name].push(upper[i]);
    }
    if (lower[i] != mixed[i] && upper[i] != mixed[i]) {
      encodingMap[name].push(mixed[i]);
    }
    encodingMap[name].push(spacey[i]);
    */
  }
});

Object.keys(encodingMap).forEach(function(name) {
  encodingMap[name].forEach(function(label) {
    var iframe = document.createElement("iframe");
    var t = async_test("Name " + format_value(name) +
                       " has label " + format_value(label) + " (characterSet)");
    var t2 = async_test("Name " + format_value(name) +
                       " has label " + format_value(label) + " (inputEncoding)");
    var t3 = async_test("Name " + format_value(name) +
                       " has label " + format_value(label) + " (charset)");
    iframe.src = "encoding.py?label=" + label;
    iframe.onload = function() {
      t.step(function() {
        assert_equals(iframe.contentDocument.characterSet, name);
      });
      t2.step(function() {
        assert_equals(iframe.contentDocument.inputEncoding, name);
      });
      t3.step(function() {
        assert_equals(iframe.contentDocument.charset, name);
      });
      document.body.removeChild(iframe);
      t.done();
      t2.done();
      t3.done();
    };
    document.body.appendChild(iframe);
  });
});
</script>
<!-- vim: set expandtab tabstop=2 shiftwidth=2: -->
